crawler.BEFORE_INIT - Code Metrics - Inspection of "#14 New Test & Bug & QA Fixes" - dylangrech92/seotoolbox - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — develop ( 322e3f...d82e9f )

by Dylan

created 2016-09-30 06:34 UTC

crawler.BEFORE_INIT A

↳ Parent: js/crawler_file_tester.js

Complexity

Conditions	1
Paths	1

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	1
nc	1
nop	0
dl	0
loc	4
rs	10
c	1
b	0
f	0

const crawler_file_tester = {

    robot_rules: [],

    /**
     * Parse the content of the robots file
     *
     * @param {*} result
     * @throws {Exception}
     */
    parse_robots_file: function(result){
        var rules = result.split("\n");
        $('#robots-check').addClass('text-success').append('<span class="glyphicon glyphicon-ok-circle">&nbsp;</span>');

        var agent = '*';
        for(var r in rules){
            if( rules[r].length < 1 || rules[r].toLowerCase().indexOf('sitemap:') >= 0 ){
                continue;
            }else if( rules[r].toLowerCase().indexOf('user-agent:') >= 0 ){
                agent = rules[r].replace(/user-agent:/gi, '').replace(/^\s+|\s+$|\s+(?=\s)/g, '');
            }else if( rules[r].toLowerCase().indexOf('disallow:') >= 0 ){
                var rule =
                    '^'+rules[r]
                    .replace(/disallow:/gi, '') // remove disallow
                    .replace(/^\s+|\s+$|\s+(?=\s)/g, '') // remove white space
                    .replace('?', '\\?') // escape query string start
                    .replace('|', '\\|') // escape pipe
                    .replace('/', '\\/') // escape slashes
                    .replace(/^\^\^/g, '^') // If it already had a caret remove it
                    .replace(/^(\*)/g, '(.*?)'); // Replace star with match anything modifier
                crawler_file_tester.robot_rules.push({ 'rule': rule, 'agent': agent, 'original': rules[r] });
            }else{
                console.log(rules[r]);
                throw "Found a rule which we don't understand. Report it to the developer";
            }
        }
    },

    /**
     * Check all tested url and see if they are blocked by any rule in the robots file
     *
     * @returns {undefined}
     */
    test_blocked_pages: function(){
        for(var t in crawler.tested){
            var url = crawler.tested[t];

            if( crawler.linked_from.hasOwnProperty(url) ) {
                for (var r in this.robot_rules) {
                    var regex = new RegExp(this.robot_rules[r]['rule'], 'g');
                    if (regex.test('/' + url)) {
                        var link    = crawler.painter.create_link(url, url),
                            status  = crawler.painter.create_status('error', 'Page has links and is blocked in robots'),
                            agent   = ( this.robot_rules[r]['agent'] == '*' ) ? 'ALL BOTS' : this.robot_rules[r]['agent'];
                        crawler.painter.add_row(
                            'blocked_pages',
                            [link, crawler.linked_from[url].join(', '), agent, this.robot_rules[r]['original'], status]);
                    }
                }
            }
        }

        return undefined;
    },

    /**
     * Setup an ajax call to fetch url
     *
     * @param {string} url
     * @param {function} callback
     * @param {function} failed_callback
     */
    get_file_contents: function(url, callback, failed_callback){
        $.ajax({
            'url': crawler.get_proxy('/seotest/getPage?u='+url+'&agent='+crawler.agent)
        }).done(callback).fail(failed_callback);
    }
};

// Register the tests
crawler.event_handler.on('BEFORE_INIT', function(){
    crawler.regiser_test('blocked_pages', 'BLOCKED PAGES', ['URL', 'Linked From', 'Blocked For', 'Blocked By', 'Status'], false);
    crawler.painter.set_type('blocked_pages', 'default');
});

// Start up the file testers
crawler.event_handler.on('AFTER_INIT', function(){
    crawler_file_tester.get_file_contents(
        crawler.robots_url,
        crawler_file_tester.parse_robots_file,
        function(){ $('#robots-check').addClass('text-danger').append('<span class="glyphicon glyphicon-remove-circle">&nbsp;</span>'); }
    );
    //crawler_file_tester.init_sitemap_tester();
});

// Test for blocked pages the the crawler finishes
crawler.event_handler.on('ALL_CRAWLS_FINISHED', function(){
    crawler_file_tester.test_blocked_pages();
});



1			const crawler_file_tester = {
2
3			robot_rules: [],
4
5			/**
6			* Parse the content of the robots file
7			*
8			* @param {*} result
9			* @throws {Exception}
10			*/
11			parse_robots_file: function(result){
12			var rules = result.split("\n");
13			$('#robots-check').addClass('text-success').append('<span class="glyphicon glyphicon-ok-circle"> </span>');
14
15			var agent = '*';
16			for(var r in rules){
17			if( rules[r].length < 1 \|\| rules[r].toLowerCase().indexOf('sitemap:') >= 0 ){
18			continue;
19			}else if( rules[r].toLowerCase().indexOf('user-agent:') >= 0 ){
20			agent = rules[r].replace(/user-agent:/gi, '').replace(/^\s+\|\s+$\|\s+(?=\s)/g, '');
21			}else if( rules[r].toLowerCase().indexOf('disallow:') >= 0 ){
22			var rule =
23			'^'+rules[r]
24			.replace(/disallow:/gi, '') // remove disallow
25			.replace(/^\s+\|\s+$\|\s+(?=\s)/g, '') // remove white space
26			.replace('?', '\\?') // escape query string start
27			.replace('\|', '\\\|') // escape pipe
28			.replace('/', '\\/') // escape slashes
29			.replace(/^\^\^/g, '^') // If it already had a caret remove it
30			.replace(/^(\)/g, '(.?)'); // Replace star with match anything modifier
31			crawler_file_tester.robot_rules.push({ 'rule': rule, 'agent': agent, 'original': rules[r] });
32			}else{
33			console.log(rules[r]);
34			throw "Found a rule which we don't understand. Report it to the developer";
35			}
36			}
37			},
38
39			/**
40			* Check all tested url and see if they are blocked by any rule in the robots file
41			*
42			* @returns {undefined}
43			*/
44			test_blocked_pages: function(){
45			for(var t in crawler.tested){
46			var url = crawler.tested[t];
47
48			if( crawler.linked_from.hasOwnProperty(url) ) {
49			for (var r in this.robot_rules) {
50			var regex = new RegExp(this.robot_rules[r]['rule'], 'g');
51			if (regex.test('/' + url)) {
52			var link = crawler.painter.create_link(url, url),
53			status = crawler.painter.create_status('error', 'Page has links and is blocked in robots'),
54			agent = ( this.robot_rules[r]['agent'] == '*' ) ? 'ALL BOTS' : this.robot_rules[r]['agent'];
55			crawler.painter.add_row(
56			'blocked_pages',
57			[link, crawler.linked_from[url].join(', '), agent, this.robot_rules[r]['original'], status]);
58			}
59			}
60			}
61			}
62
63			return undefined;
64			},
65
66			/**
67			* Setup an ajax call to fetch url
68			*
69			* @param {string} url
70			* @param {function} callback
71			* @param {function} failed_callback
72			*/
73			get_file_contents: function(url, callback, failed_callback){
74			$.ajax({
75			'url': crawler.get_proxy('/seotest/getPage?u='+url+'&agent='+crawler.agent)
76			}).done(callback).fail(failed_callback);
77			}
78			};
79
80			// Register the tests
81			crawler.event_handler.on('BEFORE_INIT', function(){
82			crawler.regiser_test('blocked_pages', 'BLOCKED PAGES', ['URL', 'Linked From', 'Blocked For', 'Blocked By', 'Status'], false);
83			crawler.painter.set_type('blocked_pages', 'default');
84			});
85
86			// Start up the file testers
87			crawler.event_handler.on('AFTER_INIT', function(){
88			crawler_file_tester.get_file_contents(
89			crawler.robots_url,
90			crawler_file_tester.parse_robots_file,
91			function(){ $('#robots-check').addClass('text-danger').append('<span class="glyphicon glyphicon-remove-circle"> </span>'); }
92			);
93			//crawler_file_tester.init_sitemap_tester();
94			});
95
96			// Test for blocked pages the the crawler finishes
97			crawler.event_handler.on('ALL_CRAWLS_FINISHED', function(){
98			crawler_file_tester.test_blocked_pages();
99			});
100
101

dylangrech92 / seotoolbox

Push — develop ( 322e3f...d82e9f )

crawler.BEFORE_INIT A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like